package cn.galudisu.spark._1_dataframes

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

/**
  * 从case classes 中创建Spark Dataframe，以及进行SQL-Like操作
  *
  * @author galudisu
  */
object DataFrameFromCaseClasses extends App {

  // 创建上下文
  val conf       = new SparkConf().setAppName("colRowDataFrame").setMaster("local[2]")
  val sqlContext = SparkSession.builder().config(conf = conf).getOrCreate().sqlContext

  // 我们可以从各种资源中获取数据，如RDBMS数据源。简单起见，这里构造一个假的数据源
  val listOfEmployees = List(Employee(1, "Arun"), Employee(2, "Jason"), Employee(3, "Abhi"))

  // 传给SQLContext
  val empFrame = sqlContext.createDataFrame(listOfEmployees)
  empFrame.printSchema()

  // 重命名columns
  val empFrameWithRenamedColumns = sqlContext.createDataFrame(listOfEmployees).withColumnRenamed("id", "empId")
  empFrameWithRenamedColumns.printSchema()

  // 注册为表，进行查询
  empFrameWithRenamedColumns.createOrReplaceTempView("employeeTable")
  val sortedByNameEmployees = sqlContext.sql("select * from employeeTable order by name desc")
  sortedByNameEmployees.show()

  // 还可以从Seq直接创建Dataframe
  val mobiles = sqlContext.createDataFrame(Seq((1, "Android"), (2, "iPhone")))
  mobiles.printSchema()
  mobiles.show()

  // 当然也可以重命名列
  val mobilesWithRenamedColumns = mobiles.withColumnRenamed("_1", "id").withColumnRenamed("_2", "mobile")
  mobilesWithRenamedColumns.printSchema()
  mobilesWithRenamedColumns.show()


}

case class Employee(id: Int, name: String)

